In [ ]:
from sqlalchemy import create_engine
from sqlalchemy import inspect
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as dates
import time
import datetime
import numpy as np
import mpld3 as mpl
In [ ]:
dbfile = '/home/nugentb7/Documents/Fall21/InformationVisualization/DuckDuckGoose/scripts/waterways.db'
cnx = create_engine('sqlite:///'+dbfile)
inspector = inspect(cnx)
for table_name in inspector.get_table_names():
    print(table_name)
In [ ]:
chemical_df = pd.read_sql_table('chemical',cnx)
chemical_df = chemical_df.dropna() # Dropping chemical id 107
location_df = pd.read_sql_table('location',cnx)
unit_of_measure_df = pd.read_sql_table('unit_of_measure',cnx)
waterway_reading_df = pd.read_sql_table('waterway_reading',cnx)
waterway_reading_df.drop(waterway_reading_df[waterway_reading_df['chemical_id'] == 107].index, inplace=True) # Dropping msmts with chem id 107
waterway_reading_df['julian_date'] = pd.DatetimeIndex(waterway_reading_df['sample_date']).to_julian_date()
waterway_reading_df['pydate'] = pd.DatetimeIndex(waterway_reading_df['sample_date']).to_pydatetime()
waterway_reading_df.sort_values(by=['julian_date'])
In [ ]:
# Plot methylosmolene readings over time (ID=64)
molene = waterway_reading_df[waterway_reading_df.chemical_id==64]
plt.plot_date(dates.date2num(molene.pydate),molene.value,'bo')
In [ ]:
# Map msmt units to waterway reading
chemical_id = pd.unique(waterway_reading_df.chemical_id)
waterway_reading_df['msmt_unit_id'] = np.zeros_like(waterway_reading_df.chemical_id.values)
waterway_reading_df['norm_reading'] = np.zeros_like(waterway_reading_df.chemical_id.values)
for id in chemical_id:
    assoc_msmt_id = chemical_df[chemical_df['id']==id].unit_of_measure_id.values[0]
    waterway_reading_df.loc[waterway_reading_df['chemical_id']==id,'msmt_unit_id']=assoc_msmt_id
    waterway_reading_df.loc[waterway_reading_df['chemical_id']==id,'norm_reading']=waterway_reading_df.loc[waterway_reading_df['chemical_id']==id,'value']\
        / waterway_reading_df.loc[waterway_reading_df['chemical_id']==id,'value'].mean()
In [ ]:
waterway_reading_df.norm_reading
In [ ]:
 
In [ ]:
# Plot data with each unique measurement unit, normalized to individual chemical reading data
# and per location
msmt_units = unit_of_measure_df.id.values
locations  = location_df.id
enable_datatip=False
for location in locations:
    waterway_reading_df_lim = waterway_reading_df[waterway_reading_df['location_id']==location]
    for unit in msmt_units:
        fig,ax = plt.subplots(1,figsize=(12,6))
        chemicals = waterway_reading_df_lim[waterway_reading_df_lim['msmt_unit_id']==unit].chemical_id
        scatter = ax.scatter(waterway_reading_df_lim[waterway_reading_df_lim['msmt_unit_id']==unit].pydate,waterway_reading_df_lim[waterway_reading_df_lim['msmt_unit_id']==unit].norm_reading,c=chemicals)
        unit_str = unit_of_measure_df[unit_of_measure_df['id']==unit].unit_name.values
        unit_str = unit_str[0].decode("utf-8")
        loc_str  = location_df[location_df['id']==location].display.values
        loc_str = loc_str[0]
        plt.title(loc_str)
        plt.xlabel('Sample Date')
        plt.ylabel('Msmt Reading (' + unit_str + ')')
        if enable_datatip:
            tooltip = mpl.plugins.PointLabelTooltip(scatter,labels=chemicals.to_string())
            mpl.plugins.connect(fig,tooltip)
            mpl.enable_notebook()
/tmp/ipykernel_104526/2251087418.py:9: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig,ax = plt.subplots(1,figsize=(12,6))
In [ ]:
unit_str
In [ ]: